by Jason Kim
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline
pd.set_option('display.max_rows', None)
import plotly.offline as py
import plotly.graph_objects as go
import plotly.express as px
from IPython.display import HTML,display,display_html
df = pd.read_csv(r'data\FIFA-21 Complete.csv',sep=';', index_col='player_id')
# in the FIFA21 player database, the team names had a space at the end: truncated.
df['team'] = df['team'].str.rstrip()
team_stat = df.groupby(['team'])['overall'].mean().sort_values(ascending=False)
team_stat.head(15).to_frame().style.background_gradient(cmap="magma")
| overall | |
|---|---|
| team | |
| Juventus | 83.043478 |
| FC Bayern München | 81.565217 |
| Napoli | 79.730769 |
| Real Madrid | 79.606061 |
| Paris Saint-Germain | 78.866667 |
| Inter | 78.576923 |
| FC Barcelona | 78.090909 |
| Bayer 04 Leverkusen | 77.916667 |
| Manchester City | 76.875000 |
| SL Benfica | 76.600000 |
| Lazio | 76.500000 |
| Liverpool | 76.454545 |
| Atlético Madrid | 76.424242 |
| Milan | 76.040000 |
| Manchester United | 76.030303 |
best_itw=df.loc[df['overall'] > 84 ].sort_values(by = ['overall'],ascending=False)
best_itw['team'].value_counts().head(15).to_frame().style.background_gradient(cmap="twilight")
| team | |
|---|---|
| FC Barcelona | 12 |
| Real Madrid | 11 |
| Manchester City | 11 |
| Liverpool | 10 |
| Paris Saint-Germain | 9 |
| Juventus | 8 |
| FC Bayern München | 8 |
| Inter | 6 |
| Tottenham Hotspur | 5 |
| Borussia Dortmund | 5 |
| Atlético Madrid | 4 |
| Napoli | 3 |
| Manchester United | 3 |
| Lazio | 3 |
| Arsenal | 2 |
# Average Position Analysis
# Data pulled from Wikipedia
# (not many sites and data sources keep track of teams' mid-season league positions)
links = ["https://en.wikipedia.org/wiki/2009%E2%80%9310_Manchester_United_F.C._season","https://en.wikipedia.org/wiki/2010%E2%80%9311_Manchester_United_F.C._season","https://en.wikipedia.org/wiki/2011%E2%80%9312_Manchester_United_F.C._season","https://en.wikipedia.org/wiki/2012%E2%80%9313_Manchester_United_F.C._season","https://en.wikipedia.org/wiki/2013%E2%80%9314_Manchester_United_F.C._season","https://en.wikipedia.org/wiki/2014%E2%80%9315_Manchester_United_F.C._season","https://en.wikipedia.org/wiki/2015%E2%80%9316_Manchester_United_F.C._season","https://en.wikipedia.org/wiki/2016%E2%80%9317_Manchester_United_F.C._season","https://en.wikipedia.org/wiki/2017%E2%80%9318_Manchester_United_F.C._season","https://en.wikipedia.org/wiki/2018%E2%80%9319_Manchester_United_F.C._season","https://en.wikipedia.org/wiki/2019%E2%80%9320_Manchester_United_F.C._season"]
league_pos = []
def league_pos_extractor(link,league):
manu_df = pd.read_html(link)
if len(manu_df[4]) ==38:
positions = manu_df[4].iloc[:,-1].values.tolist()
elif link == "https://en.wikipedia.org/wiki/2018%E2%80%9319_Manchester_United_F.C._season":
positions = manu_df[3].iloc[:,-2].values.tolist()
else:
positions = manu_df[3].iloc[:,-1].values.tolist()
league.append(positions)
for link in links:
league_pos_extractor(link,league_pos)
agg_lg_pos = pd.DataFrame(league_pos)
df2 = agg_lg_pos.T
df2.columns = ["09/10","10/11","11/12","12/13","13/14","14/15","15/16","16/17","17/18","18/19","19/20"]
for col in df2:
if col == "15/16":
df2[col] = df2[col].str[0]
else:
df2[col] = df2[col].str[:-2]
df2[col] = pd.to_numeric(df2[col])
df2.index = range(1,39)
avg_pos = []
for col in df2:
avg_pos.append((np.mean(df2[col])))
pd.options.plotting.backend = "plotly"
fig = df2.plot(labels = dict(index="Gameweek",value="League Position",variable="season",line_shape="hv"))
fig['layout']['yaxis']['autorange'] = "reversed"
fig.update_layout(title='Man UTD league position by GW since 09/10')
fig.show(include_plotlyjs=False,output_type='div')
years = df2.columns
# fig2 = go.Figure(data=[go.Table(header=dict(values=['Season','Average Position', 'Final Position']),
# cells=dict(values=[df2.columns,avg_pos, [2,1,2,1,7,4,5,6,2,6,3]]))
# ])
fig2 = go.Figure()
fig2.add_trace(go.Scatter(x=years, y=avg_pos,
mode='lines+markers',
name='Avg Position'))
fig2.add_trace(go.Scatter(x=years, y= [2,1,2,1,7,4,5,6,2,6,3],
mode='lines+markers',
name='Final Position'))
fig2.update_layout(title='Avg Position vs Actual Final Position')
fig2['layout']['yaxis']['autorange'] = "reversed"
fig2.show()
pl_champs = ['Liverpool','Chelsea','Manchester City','Leicester City', 'Manchester United']
ratings = [[],[],[],[],[]]
df15 = pd.read_csv(r'data\players_15.csv')
df16 = pd.read_csv(r'data\players_16.csv')
df17 = pd.read_csv(r'data\players_17.csv')
df18 = pd.read_csv(r'data\players_18.csv')
df19 = pd.read_csv(r'data\players_19.csv')
df20 = pd.read_csv(r'data\players_20.csv')
def champ_mean(plchamps,df,lists):
for champ,i in zip(plchamps,range(5)):
lists[i].append(df.loc[df['club'] == champ ]['overall'].mean())
dfs = [df15,df16,df17,df18,df19,df20]
for df in dfs:
champ_mean(pl_champs,df,ratings)
fig3 = go.Figure()
x = years[5:]
for i in range(5):
fig3.add_trace(go.Scatter(x=x, y=ratings[i], name=pl_champs[i],
line_shape='linear'))
fig3.update_traces(hoverinfo='text+name', mode='lines+markers')
fig3.update_layout(legend=dict(y=0.5, traceorder='reversed', font_size=16))
fig3.update_layout(title='Mean FIFA ratings across top PL teams')
def display_side_by_side(*args):
html_str=''
for df in args:
html_str+=df.to_html()
display_html(html_str.replace('table','table style="display:inline"'),raw=True)
mu_16_17=df17.loc[df17['club']=="Manchester United"].sort_values(by = ['overall'],ascending=False)
mu_16_17 = mu_16_17[['short_name','overall']].style.background_gradient(cmap="Reds").set_table_attributes("style='display:inline'").set_caption('16/17 Man Utd (6th) ')
mc_16_17=df17.loc[df17['club']=="Manchester City"].sort_values(by = ['overall'],ascending=False)
mc_16_17 = mc_16_17[['short_name','overall']].style.background_gradient(cmap="cool").set_table_attributes("style='display:inline'").set_caption('Man City (3rd)')
cfc_16_17=df17.loc[df17['club']=="Chelsea"].sort_values(by = ['overall'],ascending=False)
cfc_16_17 = cfc_16_17[['short_name','overall']].style.background_gradient(cmap="Blues").set_table_attributes("style='display:inline'").set_caption('Chelsea (1st)')
spurs_16_17=df17.loc[df17['club']=="Tottenham Hotspur"].sort_values(by = ['overall'],ascending=False)
spurs_16_17 = spurs_16_17[['short_name','overall']].style.background_gradient(cmap="gray").set_table_attributes("style='display:inline'").set_caption('Spurs (2nd)')
display_html(mu_16_17._repr_html_()+cfc_16_17._repr_html_()+spurs_16_17._repr_html_()+mc_16_17._repr_html_(),raw=True)
| short_name | overall | |
|---|---|---|
| 5 | De Gea | 90 |
| 6 | Z. Ibrahimović | 90 |
| 16 | P. Pogba | 88 |
| 81 | H. Mkhitaryan | 85 |
| 105 | C. Smalling | 84 |
| 111 | W. Rooney | 84 |
| 121 | Juan Mata | 84 |
| 153 | B. Schweinsteiger | 83 |
| 190 | A. Martial | 82 |
| 191 | E. Bailly | 82 |
| 215 | M. Schneiderlin | 82 |
| 226 | Ander Herrera | 82 |
| 270 | L. Shaw | 81 |
| 313 | D. Blind | 81 |
| 337 | M. Carrick | 81 |
| 481 | A. Valencia | 80 |
| 430 | M. Darmian | 80 |
| 388 | M. Depay | 80 |
| 569 | P. Jones | 79 |
| 641 | A. Young | 79 |
| 667 | S. Romero | 79 |
| 794 | M. Rojo | 78 |
| 892 | M. Fellaini | 78 |
| 972 | J. Lingard | 77 |
| 1236 | M. Rashford | 76 |
| 3900 | T. Fosu-Mensah | 71 |
| 6386 | S. Johnstone | 68 |
| 7313 | A. Tuanzebe | 67 |
| 10340 | J. Riley | 64 |
| 11278 | R. Poole | 63 |
| 14587 | S. Goss | 58 |
| 15124 | S. El Fitouri | 57 |
| short_name | overall | |
|---|---|---|
| 10 | T. Courtois | 89 |
| 20 | E. Hazard | 88 |
| 53 | Cesc Fàbregas | 86 |
| 66 | Diego Costa | 85 |
| 78 | Willian | 85 |
| 107 | J. Terry | 84 |
| 125 | David Luiz | 84 |
| 126 | Azpilicueta | 84 |
| 127 | N. Matić | 84 |
| 136 | Oscar | 83 |
| 166 | G. Cahill | 83 |
| 169 | A. Begović | 83 |
| 184 | Pedro | 83 |
| 281 | M. Batshuayi | 81 |
| 292 | N. Kanté | 81 |
| 497 | B. Ivanović | 80 |
| 383 | K. Zouma | 80 |
| 864 | J. Mikel | 78 |
| 964 | M. van Ginkel | 77 |
| 1013 | V. Moses | 77 |
| 1021 | Marcos Alonso | 77 |
| 1507 | Eduardo | 76 |
| 4766 | T. Kane | 70 |
| 5456 | D. Solanke | 69 |
| 6358 | N. Chalobah | 68 |
| 6344 | R. Loftus-Cheek | 68 |
| 11300 | O. Aina | 63 |
| 12802 | F. Tomori | 61 |
| 14058 | J. Dasilva | 59 |
| 14536 | K. Scott | 58 |
| 14997 | M. Mount | 57 |
| 15087 | B. Collins | 57 |
| 15764 | T. Chalobah | 55 |
| short_name | overall | |
|---|---|---|
| 27 | H. Lloris | 88 |
| 67 | T. Alderweireld | 85 |
| 85 | H. Kane | 84 |
| 87 | C. Eriksen | 84 |
| 170 | J. Vertonghen | 83 |
| 247 | M. Dembélé | 82 |
| 284 | E. Lamela | 81 |
| 296 | K. Walker | 81 |
| 382 | D. Alli | 80 |
| 428 | D. Rose | 80 |
| 459 | M. Vorm | 80 |
| 505 | M. Sissoko | 80 |
| 565 | K. Wimmer | 79 |
| 761 | B. Davies | 78 |
| 756 | H. Son | 78 |
| 751 | V. Wanyama | 78 |
| 742 | G. Nkoudou | 78 |
| 731 | V. Janssen | 78 |
| 716 | E. Dier | 78 |
| 1014 | K. Trippier | 77 |
| 2656 | Pau López | 73 |
| 3334 | T. Carroll | 72 |
| 4619 | J. Onomah | 70 |
| 10413 | H. Winks | 64 |
| 12117 | C. Carter-Vickers | 62 |
| 13471 | S. Harrison | 60 |
| 14519 | M. Edwards | 58 |
| 14543 | K. Walker-Peters | 58 |
| 14646 | L. Amos | 58 |
| 14662 | C. Ogilvie | 58 |
| 15067 | C. Maghoma | 57 |
| 15441 | A. Walkes | 56 |
| 16584 | H. Voss | 52 |
| short_name | overall | |
|---|---|---|
| 12 | S. Agüero | 89 |
| 18 | K. De Bruyne | 88 |
| 38 | David Silva | 87 |
| 49 | V. Kompany | 86 |
| 65 | I. Gündoğan | 85 |
| 76 | C. Bravo | 85 |
| 80 | N. Otamendi | 85 |
| 108 | Y. Touré | 84 |
| 188 | Nolito | 83 |
| 196 | R. Sterling | 82 |
| 237 | P. Zabaleta | 82 |
| 248 | B. Sagna | 82 |
| 345 | Fernandinho | 81 |
| 467 | Jesús Navas | 80 |
| 471 | G. Clichy | 80 |
| 689 | A. Kolarov | 79 |
| 687 | Fernando | 79 |
| 527 | L. Sané | 79 |
| 710 | J. Stones | 78 |
| 782 | F. Delph | 78 |
| 836 | W. Caballero | 78 |
| 2078 | K. Iheanacho | 74 |
| 8365 | T. Ambrose | 66 |
| 9339 | Pablo Maffeo | 65 |
| 9352 | Angeliño | 65 |
| 9367 | A. Gunn | 65 |
| 10320 | Brahim Díaz | 64 |
| 11269 | Aleix García | 63 |
| 12784 | C. Humphreys-Grant | 61 |
| 12874 | I. Lawlor | 61 |
| 13445 | W. Patching | 60 |
| 13508 | A. Nemane | 60 |
| 14076 | T. Adarabioyo | 59 |
liv_19_20=df20.loc[df20['club']=="Liverpool"].sort_values(by = ['overall'],ascending=False)
liv_19_20 = liv_19_20[['short_name','overall']].style.background_gradient(cmap="Reds").set_table_attributes("style='display:inline'").set_caption('19/20 Liverpool (1st) ')
manu_19_20=df20.loc[df20['club']=="Manchester United"].sort_values(by = ['overall'],ascending=False)
manu_19_20 = manu_19_20[['short_name','overall']].style.background_gradient(cmap="Reds").set_table_attributes("style='display:inline'").set_caption('19/20 Man Utd (3rd) ')
display_html(liv_19_20._repr_html_()+manu_19_20._repr_html_(),raw=True)
| short_name | overall | |
|---|---|---|
| 7 | V. van Dijk | 90 |
| 9 | M. Salah | 90 |
| 13 | Alisson | 89 |
| 39 | S. Mané | 88 |
| 63 | Roberto Firmino | 86 |
| 81 | Fabinho | 85 |
| 82 | A. Robertson | 85 |
| 139 | G. Wijnaldum | 84 |
| 160 | T. Alexander-Arnold | 83 |
| 218 | J. Henderson | 83 |
| 243 | N. Keïta | 82 |
| 260 | J. Matip | 82 |
| 313 | X. Shaqiri | 82 |
| 371 | J. Milner | 81 |
| 451 | A. Oxlade-Chamberlain | 80 |
| 542 | D. Lovren | 80 |
| 413 | J. Gomez | 80 |
| 689 | A. Lallana | 79 |
| 788 | D. Origi | 78 |
| 934 | N. Clyne | 78 |
| 1542 | Adrián | 76 |
| 2658 | R. Kent | 73 |
| 7814 | H. Kane | 67 |
| 8979 | Pedro Chirivella | 66 |
| 8762 | S. van den Berg | 66 |
| 10671 | A. Lonergan | 65 |
| 11022 | R. Brewster | 64 |
| 13104 | K. Hoever | 62 |
| 13978 | C. Jones | 61 |
| 14637 | P. Glatzel | 60 |
| 14645 | A. Lewis | 60 |
| 15347 | Y. Larouci | 59 |
| 15403 | C. Kelleher | 59 |
| short_name | overall | |
|---|---|---|
| 14 | De Gea | 89 |
| 24 | P. Pogba | 88 |
| 163 | A. Martial | 83 |
| 171 | M. Rashford | 83 |
| 280 | H. Maguire | 82 |
| 295 | Juan Mata | 82 |
| 301 | A. Sánchez | 82 |
| 319 | J. Lingard | 82 |
| 335 | V. Lindelöf | 81 |
| 347 | L. Shaw | 81 |
| 396 | N. Matić | 81 |
| 462 | E. Bailly | 80 |
| 508 | S. Romero | 80 |
| 534 | C. Smalling | 80 |
| 644 | Fred | 79 |
| 654 | P. Jones | 79 |
| 571 | A. Wan-Bissaka | 79 |
| 957 | M. Rojo | 78 |
| 1028 | S. McTominay | 77 |
| 1044 | Andreas Pereira | 77 |
| 1140 | A. Young | 77 |
| 1631 | Diogo Dalot | 75 |
| 1962 | M. Darmian | 75 |
| 2702 | T. Fosu-Mensah | 73 |
| 2661 | A. Tuanzebe | 73 |
| 3299 | D. James | 72 |
| 3689 | L. Grant | 72 |
| 6622 | A. Gomes | 68 |
| 7690 | M. Greenwood | 67 |
| 8010 | C. Borthwick-Jackson | 67 |
| 8839 | D. Mitchell | 66 |
| 9901 | T. Chong | 65 |
| 12167 | J. Garner | 63 |
def champ_mean(plchamps,df,lists):
for champ,i in zip(plchamps,range(5)):
lists[i].append(df.loc[df['club'] == champ].sort_values(by = ['overall'],ascending=False).head(25)['overall'].mean())
ratings = [[],[],[],[],[]]
for df in dfs:
champ_mean(pl_champs,df,ratings)
fig4 = go.Figure()
x = years[5:]
for i in range(5):
fig4.add_trace(go.Scatter(x=x, y=ratings[i], name=pl_champs[i],
line_shape='linear'))
fig4.update_traces(hoverinfo='text+name', mode='lines+markers')
fig4.update_layout(legend=dict(y=0.5, traceorder='reversed', font_size=16))
fig4.update_layout(title='Mean FIFA ratings across top PL teams (25 players)')
def champ_mean(plchamps,df,lists):
for champ,i in zip(plchamps,range(5)):
lists[i].append(df.loc[df['club'] == champ].sort_values(by = ['overall'],ascending=False).head(15)['overall'].mean())
ratings = [[],[],[],[],[]]
for df in dfs:
champ_mean(pl_champs,df,ratings)
fig5 = go.Figure()
x = years[5:]
for i in range(5):
fig5.add_trace(go.Scatter(x=x, y=ratings[i], name=pl_champs[i],
line_shape='linear'))
fig5.update_traces(hoverinfo='text+name', mode='lines+markers')
fig5.update_layout(legend=dict(y=0.5, traceorder='reversed', font_size=16))
fig5.update_layout(title='Mean FIFA ratings across top PL teams (15 players)')